';
for (let info of saveRequestInfo) {
content +=
`
${info.key}
${info.value}
`;
}
content += '
';
}
$('.swh-popover').html(content);
$(event.target).popover('update');
});
}
export function fillSaveRequestFormAndScroll(visitType, originUrl) {
$('#swh-input-origin-url').val(originUrl);
let originTypeFound = false;
$('#swh-input-visit-type option').each(function() {
let val = $(this).val();
if (val && originUrl.includes(val)) {
$(this).prop('selected', true);
originTypeFound = true;
}
});
if (!originTypeFound) {
$('#swh-input-visit-type option').each(function() {
let val = $(this).val();
if (val === visitType) {
$(this).prop('selected', true);
}
});
}
window.scrollTo(0, 0);
}
diff --git a/cypress/integration/origin-save.spec.js b/cypress/integration/origin-save.spec.js
index 4e2abf8e..7db35c31 100644
--- a/cypress/integration/origin-save.spec.js
+++ b/cypress/integration/origin-save.spec.js
@@ -1,475 +1,554 @@
/**
* Copyright (C) 2019-2021 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU Affero General Public License version 3, or any later version
* See top-level LICENSE file for more information
*/
let url;
let origin;
const $ = Cypress.$;
const saveCodeMsg = {
'success': 'The "save code now" request has been accepted and will be processed as soon as possible.',
'warning': 'The "save code now" request has been put in pending state and may be accepted for processing after manual review.',
'rejected': 'The "save code now" request has been rejected because the provided origin url is blacklisted.',
'rateLimit': 'The rate limit for "save code now" requests has been reached. Please try again later.',
'not-found': 'The provided url does not exist',
'unknownError': 'An unexpected error happened when submitting the "save code now request',
'csrfError': 'CSRF Failed: Referrer checking failed - no Referrer.'
};
+const anonymousVisitTypes = ['git', 'hg', 'svn'];
+const allVisitTypes = ['bundle', 'git', 'hg', 'svn'];
+
function makeOriginSaveRequest(originType, originUrl) {
cy.get('#swh-input-origin-url')
.type(originUrl)
.get('#swh-input-visit-type')
.select(originType)
.get('#swh-save-origin-form')
.submit();
}
function checkAlertVisible(alertType, msg) {
cy.get('#swh-origin-save-request-status')
.should('be.visible')
.find(`.alert-${alertType}`)
.should('be.visible')
.and('contain', msg);
}
// Stub requests to save an origin
function stubSaveRequest({
requestUrl,
visitType = 'git',
saveRequestStatus,
originUrl,
saveTaskStatus,
responseStatus = 200,
// For error code with the error message in the 'reason' key response
errorMessage = '',
saveRequestDate = new Date(),
visitDate = new Date(),
visitStatus = null
} = {}) {
let response;
if (responseStatus !== 200 && errorMessage) {
response = {
'reason': errorMessage
};
} else {
response = genOriginSaveResponse({visitType: visitType,
saveRequestStatus: saveRequestStatus,
originUrl: originUrl,
saveRequestDate: saveRequestDate,
saveTaskStatus: saveTaskStatus,
visitDate: visitDate,
visitStatus: visitStatus
});
}
cy.intercept('POST', requestUrl, {body: response, statusCode: responseStatus})
.as('saveRequest');
}
// Mocks API response : /save/(:visit_type)/(:origin_url)
// visit_type : {'git', 'hg', 'svn', ...}
function genOriginSaveResponse({
visitType = 'git',
saveRequestStatus,
originUrl,
saveRequestDate = new Date(),
saveTaskStatus,
visitDate = new Date(),
visitStatus
} = {}) {
return {
'visit_type': visitType,
'save_request_status': saveRequestStatus,
'origin_url': originUrl,
'id': 1,
'save_request_date': saveRequestDate ? saveRequestDate.toISOString() : null,
'save_task_status': saveTaskStatus,
'visit_date': visitDate ? visitDate.toISOString() : null,
'visit_status': visitStatus
};
};
describe('Origin Save Tests', function() {
before(function() {
url = this.Urls.origin_save();
origin = this.origin[0];
this.originSaveUrl = this.Urls.api_1_save_origin(origin.type, origin.url);
});
beforeEach(function() {
cy.fixture('origin-save').as('originSaveJSON');
cy.fixture('save-task-info').as('saveTaskInfoJSON');
cy.visit(url);
});
it('should format appropriately values depending on their type', function() {
let inputValues = [ // null values stay null
{type: 'json', value: null, expectedValue: null},
{type: 'date', value: null, expectedValue: null},
{type: 'raw', value: null, expectedValue: null},
{type: 'duration', value: null, expectedValue: null},
// non null values formatted depending on their type
{type: 'json', value: '{}', expectedValue: '"{}"'},
{type: 'date', value: '04/04/2021 01:00:00', expectedValue: '4/4/2021, 1:00:00 AM'},
{type: 'raw', value: 'value-for-identity', expectedValue: 'value-for-identity'},
{type: 'duration', value: '10', expectedValue: '10 seconds'},
{type: 'duration', value: 100, expectedValue: '100 seconds'}
];
cy.window().then(win => {
inputValues.forEach(function(input, index, array) {
let actualValue = win.swh.save.formatValuePerType(input.type, input.value);
assert.equal(actualValue, input.expectedValue);
});
});
});
it('should display accepted message when accepted', function() {
stubSaveRequest({requestUrl: this.originSaveUrl,
saveRequestStatus: 'accepted',
originUrl: origin.url,
saveTaskStatus: 'not yet scheduled'});
makeOriginSaveRequest(origin.type, origin.url);
cy.wait('@saveRequest').then(() => {
checkAlertVisible('success', saveCodeMsg['success']);
});
});
it('should validate gitlab subproject url', function() {
const gitlabSubProjectUrl = 'https://gitlab.com/user/project/sub/';
const originSaveUrl = this.Urls.api_1_save_origin('git', gitlabSubProjectUrl);
stubSaveRequest({requestUrl: originSaveUrl,
saveRequestStatus: 'accepted',
originurl: gitlabSubProjectUrl,
saveTaskStatus: 'not yet scheduled'});
makeOriginSaveRequest('git', gitlabSubProjectUrl);
cy.wait('@saveRequest').then(() => {
checkAlertVisible('success', saveCodeMsg['success']);
});
});
it('should validate project url with _ in username', function() {
const gitlabSubProjectUrl = 'https://gitlab.com/user_name/project.git';
const originSaveUrl = this.Urls.api_1_save_origin('git', gitlabSubProjectUrl);
stubSaveRequest({requestUrl: originSaveUrl,
saveRequestStatus: 'accepted',
originurl: gitlabSubProjectUrl,
saveTaskStatus: 'not yet scheduled'});
makeOriginSaveRequest('git', gitlabSubProjectUrl);
cy.wait('@saveRequest').then(() => {
checkAlertVisible('success', saveCodeMsg['success']);
});
});
it('should display warning message when pending', function() {
stubSaveRequest({requestUrl: this.originSaveUrl,
saveRequestStatus: 'pending',
originUrl: origin.url,
saveTaskStatus: 'not created'});
makeOriginSaveRequest(origin.type, origin.url);
cy.wait('@saveRequest').then(() => {
checkAlertVisible('warning', saveCodeMsg['warning']);
});
});
it('should show error when the origin does not exist (status: 400)', function() {
stubSaveRequest({requestUrl: this.originSaveUrl,
originUrl: origin.url,
responseStatus: 400,
errorMessage: saveCodeMsg['not-found']});
makeOriginSaveRequest(origin.type, origin.url);
cy.wait('@saveRequest').then(() => {
checkAlertVisible('danger', saveCodeMsg['not-found']);
});
});
it('should show error when csrf validation failed (status: 403)', function() {
stubSaveRequest({requestUrl: this.originSaveUrl,
saveRequestStatus: 'rejected',
originUrl: origin.url,
saveTaskStatus: 'not created',
responseStatus: 403,
errorMessage: saveCodeMsg['csrfError']});
makeOriginSaveRequest(origin.type, origin.url);
cy.wait('@saveRequest').then(() => {
checkAlertVisible('danger', saveCodeMsg['csrfError']);
});
});
it('should show error when origin is rejected (status: 403)', function() {
stubSaveRequest({requestUrl: this.originSaveUrl,
saveRequestStatus: 'rejected',
originUrl: origin.url,
saveTaskStatus: 'not created',
responseStatus: 403,
errorMessage: saveCodeMsg['rejected']});
makeOriginSaveRequest(origin.type, origin.url);
cy.wait('@saveRequest').then(() => {
checkAlertVisible('danger', saveCodeMsg['rejected']);
});
});
it('should show error when rate limited (status: 429)', function() {
stubSaveRequest({requestUrl: this.originSaveUrl,
saveRequestStatus: 'Request was throttled. Expected available in 60 seconds.',
originUrl: origin.url,
saveTaskStatus: 'not created',
responseStatus: 429});
makeOriginSaveRequest(origin.type, origin.url);
cy.wait('@saveRequest').then(() => {
checkAlertVisible('danger', saveCodeMsg['rateLimit']);
});
});
it('should show error when unknown error occurs (status other than 200, 403, 429)', function() {
stubSaveRequest({requestUrl: this.originSaveUrl,
saveRequestStatus: 'Error',
originUrl: origin.url,
saveTaskStatus: 'not created',
responseStatus: 406});
makeOriginSaveRequest(origin.type, origin.url);
cy.wait('@saveRequest').then(() => {
checkAlertVisible('danger', saveCodeMsg['unknownError']);
});
});
it('should display origin save info in the requests table', function() {
cy.intercept('/save/requests/list/**', {fixture: 'origin-save'});
cy.get('#swh-origin-save-requests-list-tab').click();
cy.get('tbody tr').then(rows => {
let i = 0;
for (let row of rows) {
const cells = row.cells;
const requestDateStr = new Date(this.originSaveJSON.data[i].save_request_date).toLocaleString();
const saveStatus = this.originSaveJSON.data[i].save_task_status;
assert.equal($(cells[0]).text(), requestDateStr);
assert.equal($(cells[1]).text(), this.originSaveJSON.data[i].visit_type);
let html = '';
if (saveStatus === 'succeeded') {
let browseOriginUrl = `${this.Urls.browse_origin()}?origin_url=${encodeURIComponent(this.originSaveJSON.data[i].origin_url)}`;
browseOriginUrl += `×tamp=${encodeURIComponent(this.originSaveJSON.data[i].visit_date)}`;
html += `${this.originSaveJSON.data[i].origin_url}`;
} else {
html += this.originSaveJSON.data[i].origin_url;
}
html += ` `;
html += '';
assert.equal($(cells[2]).html(), html);
assert.equal($(cells[3]).text(), this.originSaveJSON.data[i].save_request_status);
assert.equal($(cells[4]).text(), saveStatus);
++i;
}
});
});
it('should not add timestamp to the browse origin URL is no visit date has been found', function() {
const originUrl = 'https://git.example.org/example.git';
const saveRequestData = genOriginSaveResponse({
saveRequestStatus: 'accepted',
originUrl: originUrl,
saveTaskStatus: 'succeeded',
visitDate: null,
visitStatus: 'full'
});
const saveRequestsListData = {
'recordsTotal': 1,
'draw': 2,
'recordsFiltered': 1,
'data': [saveRequestData]
};
cy.intercept('/save/requests/list/**', {body: saveRequestsListData})
.as('saveRequestsList');
cy.get('#swh-origin-save-requests-list-tab').click();
cy.wait('@saveRequestsList');
cy.get('tbody tr').then(rows => {
const firstRowCells = rows[0].cells;
const browseOriginUrl = `${this.Urls.browse_origin()}?origin_url=${encodeURIComponent(originUrl)}`;
const browseOriginLink = `${originUrl}`;
expect($(firstRowCells[2]).html()).to.have.string(browseOriginLink);
});
});
it('should display/close task info popover when clicking on the info button', function() {
cy.intercept('/save/requests/list/**', {fixture: 'origin-save'});
cy.intercept('/save/task/info/**', {fixture: 'save-task-info'});
cy.get('#swh-origin-save-requests-list-tab').click();
cy.get('.swh-save-request-info')
.eq(0)
.click();
cy.get('.swh-save-request-info-popover')
.should('be.visible');
cy.get('.swh-save-request-info')
.eq(0)
.click();
cy.get('.swh-save-request-info-popover')
.should('not.exist');
});
it('should hide task info popover when clicking on the close button', function() {
cy.intercept('/save/requests/list/**', {fixture: 'origin-save'});
cy.intercept('/save/task/info/**', {fixture: 'save-task-info'});
cy.get('#swh-origin-save-requests-list-tab').click();
cy.get('.swh-save-request-info')
.eq(0)
.click();
cy.get('.swh-save-request-info-popover')
.should('be.visible');
cy.get('.swh-save-request-info-close')
.click();
cy.get('.swh-save-request-info-popover')
.should('not.exist');
});
it('should fill save request form when clicking on "Save again" button', function() {
cy.intercept('/save/requests/list/**', {fixture: 'origin-save'});
cy.get('#swh-origin-save-requests-list-tab').click();
cy.get('.swh-save-origin-again')
.eq(0)
.click();
cy.get('tbody tr').eq(0).then(row => {
const cells = row[0].cells;
cy.get('#swh-input-visit-type')
.should('have.value', $(cells[1]).text());
cy.get('#swh-input-origin-url')
.should('have.value', $(cells[2]).text().slice(0, -1));
});
});
it('should select correct visit type if possible when clicking on "Save again" button', function() {
const originUrl = 'https://gitlab.inria.fr/solverstack/maphys/maphys/';
const badVisitType = 'hg';
const goodVisitType = 'git';
cy.intercept('/save/requests/list/**', {fixture: 'origin-save'});
stubSaveRequest({requestUrl: this.Urls.api_1_save_origin(badVisitType, originUrl),
visitType: badVisitType,
saveRequestStatus: 'accepted',
originUrl: originUrl,
saveTaskStatus: 'failed',
visitStatus: 'failed',
responseStatus: 200,
errorMessage: saveCodeMsg['accepted']});
makeOriginSaveRequest(badVisitType, originUrl);
cy.get('#swh-origin-save-requests-list-tab').click();
cy.wait('@saveRequest').then(() => {
cy.get('.swh-save-origin-again')
.eq(0)
.click();
cy.get('tbody tr').eq(0).then(row => {
const cells = row[0].cells;
cy.get('#swh-input-visit-type')
.should('have.value', goodVisitType);
cy.get('#swh-input-origin-url')
.should('have.value', $(cells[2]).text().slice(0, -1));
});
});
});
it('should create save request for authenticated user', function() {
cy.userLogin();
cy.visit(url);
const originUrl = 'https://git.example.org/account/repo';
stubSaveRequest({requestUrl: this.Urls.api_1_save_origin('git', originUrl),
saveRequestStatus: 'accepted',
originUrl: origin.url,
saveTaskStatus: 'not yet scheduled'});
makeOriginSaveRequest('git', originUrl);
cy.wait('@saveRequest').then(() => {
checkAlertVisible('success', saveCodeMsg['success']);
});
});
it('should not show user requests filter checkbox for anonymous users', function() {
cy.get('#swh-origin-save-requests-list-tab').click();
cy.get('#swh-save-requests-user-filter').should('not.exist');
});
it('should show user requests filter checkbox for authenticated users', function() {
cy.userLogin();
cy.visit(url);
cy.get('#swh-origin-save-requests-list-tab').click();
cy.get('#swh-save-requests-user-filter').should('exist');
});
it('should show only user requests when filter is activated', function() {
cy.intercept('POST', '/api/1/origin/save/**')
.as('saveRequest');
const originAnonymousUser = 'https://some.git.server/project/';
const originAuthUser = 'https://other.git.server/project/';
// anonymous user creates a save request
makeOriginSaveRequest('git', originAnonymousUser);
cy.wait('@saveRequest');
// authenticated user creates another save request
cy.userLogin();
cy.visit(url);
makeOriginSaveRequest('git', originAuthUser);
cy.wait('@saveRequest');
// user requests filter checkbox should be in the DOM
cy.get('#swh-origin-save-requests-list-tab').click();
cy.get('#swh-save-requests-user-filter').should('exist');
// check unfiltered user requests
cy.get('tbody tr').then(rows => {
expect(rows.length).to.eq(2);
expect($(rows[0].cells[2]).text()).to.contain(originAuthUser);
expect($(rows[1].cells[2]).text()).to.contain(originAnonymousUser);
});
// activate filter and check filtered user requests
cy.get('#swh-save-requests-user-filter')
.click({force: true});
cy.get('tbody tr').then(rows => {
expect(rows.length).to.eq(1);
expect($(rows[0].cells[2]).text()).to.contain(originAuthUser);
});
// deactivate filter and check unfiltered user requests
cy.get('#swh-save-requests-user-filter')
.click({force: true});
cy.get('tbody tr').then(rows => {
expect(rows.length).to.eq(2);
});
});
+ it('should list unprivileged visit types when not connected', function() {
+ cy.visit(url);
+ cy.get('#swh-input-visit-type').children('option').then(options => {
+ const actual = [...options].map(o => o.value);
+ expect(actual).to.deep.eq(anonymousVisitTypes);
+ });
+ });
+
+ it('should list unprivileged visit types when connected as unprivileged user', function() {
+ cy.userLogin();
+ cy.visit(url);
+ cy.get('#swh-input-visit-type').children('option').then(options => {
+ const actual = [...options].map(o => o.value);
+ expect(actual).to.deep.eq(anonymousVisitTypes);
+ });
+ });
+
+ it('should list privileged visit types when connected as ambassador', function() {
+ cy.ambassadorLogin();
+ cy.visit(url);
+ cy.get('#swh-input-visit-type').children('option').then(options => {
+ const actual = [...options].map(o => o.value);
+ expect(actual).to.deep.eq(allVisitTypes);
+ });
+ });
+
+ it('should display extra inputs when dealing with bundle visit type', function() {
+ cy.ambassadorLogin();
+ cy.visit(url);
+
+ for (let visitType of anonymousVisitTypes) {
+ cy.get('#swh-input-visit-type').select(visitType);
+ cy.get('#optional-origin-forms').should('not.be.visible');
+ }
+
+ // bundle should display more inputs with the bundle type
+ cy.get('#swh-input-visit-type').select('bundle');
+ cy.get('#optional-origin-forms').should('be.visible');
+
+ });
+
+ it('should be allowed to submit bundle save request when connected as ambassador', function() {
+ let originUrl = 'https://ftp.gnu.org/pub/pub/gnu/3dldf';
+ let artifactUrl = 'https://ftp.gnu.org/pub/pub/gnu/3dldf/3DLDF-1.1.4.tar.gz';
+ let artifactFilename = '3DLDF-1.1.4.tar.gz';
+ let artifactVersion = '1.1.4';
+ stubSaveRequest({
+ requestUrl: this.Urls.api_1_save_origin('bundle', originUrl),
+ saveRequestStatus: 'accepted',
+ originUrl: originUrl,
+ saveTaskStatus: 'not yet scheduled'
+ });
+
+ cy.ambassadorLogin();
+ cy.visit(url);
+
+ // input new bundle information and submit
+ cy.get('#swh-input-origin-url')
+ .type(originUrl)
+ .get('#swh-input-visit-type')
+ .select('bundle')
+ .get('#swh-input-artifact-url')
+ .type(artifactUrl)
+ .get('#swh-input-artifact-filename')
+ .type(artifactFilename)
+ .get('#swh-input-artifact-version')
+ .type(artifactVersion)
+ .get('#swh-save-origin-form')
+ .submit();
+
+ cy.wait('@saveRequest').then(() => {
+ checkAlertVisible('success', saveCodeMsg['success']);
+ });
+
+ });
+
});
diff --git a/cypress/support/index.js b/cypress/support/index.js
index b5652aef..03acb96c 100644
--- a/cypress/support/index.js
+++ b/cypress/support/index.js
@@ -1,153 +1,157 @@
/**
* Copyright (C) 2019-2020 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU Affero General Public License version 3, or any later version
* See top-level LICENSE file for more information
*/
import '@cypress/code-coverage/support';
import {httpGetJson} from '../utils';
Cypress.Screenshot.defaults({
screenshotOnRunFailure: false
});
Cypress.Commands.add('xhrShouldBeCalled', (alias, timesCalled) => {
const testRoutes = cy.state('routes');
const aliasRoute = Cypress._.find(testRoutes, {alias});
expect(Object.keys(aliasRoute.requests || {})).to.have.length(timesCalled);
});
function loginUser(username, password) {
const url = '/admin/login/';
return cy.request({
url: url,
method: 'GET'
}).then(() => {
cy.getCookie('sessionid').should('not.exist');
cy.getCookie('csrftoken').its('value').then((token) => {
cy.request({
url: url,
method: 'POST',
form: true,
followRedirect: false,
body: {
username: username,
password: password,
csrfmiddlewaretoken: token
}
}).then(() => {
cy.getCookie('sessionid').should('exist');
return cy.getCookie('csrftoken').its('value');
});
});
});
}
Cypress.Commands.add('adminLogin', () => {
return loginUser('admin', 'admin');
});
Cypress.Commands.add('userLogin', () => {
return loginUser('user', 'user');
});
+Cypress.Commands.add('ambassadorLogin', () => {
+ return loginUser('ambassador', 'ambassador');
+});
+
before(function() {
this.unarchivedRepo = {
url: 'https://github.com/SoftwareHeritage/swh-web',
type: 'git',
revision: '7bf1b2f489f16253527807baead7957ca9e8adde',
snapshot: 'd9829223095de4bb529790de8ba4e4813e38672d',
rootDirectory: '7d887d96c0047a77e2e8c4ee9bb1528463677663',
content: [{
sha1git: 'b203ec39300e5b7e97b6e20986183cbd0b797859'
}]
};
this.origin = [{
url: 'https://github.com/memononen/libtess2',
type: 'git',
content: [{
path: 'Source/tess.h'
}, {
path: 'premake4.lua'
}],
directory: [{
path: 'Source',
id: 'cd19126d815470b28919d64b2a8e6a3e37f900dd'
}],
revisions: [],
invalidSubDir: 'Source1'
}, {
url: 'https://github.com/wcoder/highlightjs-line-numbers.js',
type: 'git',
content: [{
path: 'src/highlightjs-line-numbers.js'
}],
directory: [],
revisions: ['1c480a4573d2a003fc2630c21c2b25829de49972'],
release: {
name: 'v2.6.0',
id: '6877028d6e5412780517d0bfa81f07f6c51abb41',
directory: '5b61d50ef35ca9a4618a3572bde947b8cccf71ad'
}
}];
const getMetadataForOrigin = async originUrl => {
const originVisitsApiUrl = this.Urls.api_1_origin_visits(originUrl);
const originVisits = await httpGetJson(originVisitsApiUrl);
const lastVisit = originVisits[0];
const snapshotApiUrl = this.Urls.api_1_snapshot(lastVisit.snapshot);
const lastOriginSnapshot = await httpGetJson(snapshotApiUrl);
let revision = lastOriginSnapshot.branches.HEAD.target;
if (lastOriginSnapshot.branches.HEAD.target_type === 'alias') {
revision = lastOriginSnapshot.branches[revision].target;
}
const revisionApiUrl = this.Urls.api_1_revision(revision);
const lastOriginHeadRevision = await httpGetJson(revisionApiUrl);
return {
'directory': lastOriginHeadRevision.directory,
'revision': lastOriginHeadRevision.id,
'snapshot': lastOriginSnapshot.id
};
};
cy.visit('/').window().then(async win => {
this.Urls = win.Urls;
for (let origin of this.origin) {
const metadata = await getMetadataForOrigin(origin.url);
const directoryApiUrl = this.Urls.api_1_directory(metadata.directory);
origin.dirContent = await httpGetJson(directoryApiUrl);
origin.rootDirectory = metadata.directory;
origin.revisions.push(metadata.revision);
origin.snapshot = metadata.snapshot;
for (let content of origin.content) {
const contentPathApiUrl = this.Urls.api_1_directory(origin.rootDirectory, content.path);
const contentMetaData = await httpGetJson(contentPathApiUrl);
content.name = contentMetaData.name.split('/').slice(-1)[0];
content.sha1git = contentMetaData.target;
content.directory = contentMetaData.dir_id;
content.rawFilePath = this.Urls.browse_content_raw(`sha1_git:${content.sha1git}`) +
`?filename=${encodeURIComponent(content.name)}`;
cy.request(content.rawFilePath)
.then((response) => {
const fileText = response.body;
const fileLines = fileText.split('\n');
content.numberLines = fileLines.length;
// If last line is empty its not shown
if (!fileLines[content.numberLines - 1]) content.numberLines -= 1;
});
}
}
});
});
diff --git a/swh/web/api/views/origin_save.py b/swh/web/api/views/origin_save.py
index 6dd5201c..b030a319 100644
--- a/swh/web/api/views/origin_save.py
+++ b/swh/web/api/views/origin_save.py
@@ -1,97 +1,101 @@
# Copyright (C) 2018-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.web.api.apidoc import api_doc, format_docstring
from swh.web.api.apiurls import api_route
from swh.web.auth.utils import privileged_user
from swh.web.common.origin_save import (
create_save_origin_request,
get_save_origin_requests,
)
@api_route(
r"/origin/save/(?P.+)/url/(?P.+)/",
"api-1-save-origin",
methods=["GET", "POST"],
throttle_scope="swh_save_origin",
never_cache=True,
)
@api_doc("/origin/save/")
@format_docstring()
def api_save_origin(request, visit_type, origin_url):
"""
.. http:get:: /api/1/origin/save/(visit_type)/url/(origin_url)/
.. http:post:: /api/1/origin/save/(visit_type)/url/(origin_url)/
Request the saving of a software origin into the archive
or check the status of previously created save requests.
That endpoint enables to create a saving task for a software origin
through a POST request.
Depending of the provided origin url, the save request can either be:
* immediately **accepted**, for well known code hosting providers
like for instance GitHub or GitLab
* **rejected**, in case the url is blacklisted by Software Heritage
* **put in pending state** until a manual check is done in order to
determine if it can be loaded or not
Once a saving request has been accepted, its associated saving task
status can then be checked through a GET request on the same url.
Returned status can either be:
* **not created**: no saving task has been created
* **not yet scheduled**: saving task has been created but its
execution has not yet been scheduled
* **scheduled**: the task execution has been scheduled
* **succeeded**: the saving task has been successfully executed
* **failed**: the saving task has been executed but it failed
When issuing a POST request an object will be returned while a GET
request will return an array of objects (as multiple save requests
might have been submitted for the same origin).
:param string visit_type: the type of visit to perform
(currently the supported types are ``git``, ``hg`` and ``svn``)
:param string origin_url: the url of the origin to save
{common_headers}
:>json string origin_url: the url of the origin to save
:>json string visit_type: the type of visit to perform
:>json string save_request_date: the date (in iso format) the save
request was issued
:>json string save_request_status: the status of the save request,
either **accepted**, **rejected** or **pending**
:>json string save_task_status: the status of the origin saving task,
either **not created**, **not yet scheduled**, **scheduled**,
**succeeded** or **failed**
:>json string visit_date: the date (in iso format) of the visit if a visit
occurred, null otherwise.
:>json string visit_status: the status of the visit, either **full**,
**partial**, **not_found** or **failed** if a visit occurred, null
otherwise.
:statuscode 200: no error
:statuscode 400: an invalid visit type or origin url has been provided
:statuscode 403: the provided origin url is blacklisted
:statuscode 404: no save requests have been found for a given origin
"""
+ data = request.data or {}
if request.method == "POST":
sor = create_save_origin_request(
- visit_type, origin_url, privileged_user(request), user_id=request.user.id
+ visit_type,
+ origin_url,
+ privileged_user(request),
+ user_id=request.user.id,
+ **data,
)
-
del sor["id"]
else:
sor = get_save_origin_requests(visit_type, origin_url)
for s in sor:
del s["id"]
return sor
diff --git a/swh/web/common/origin_save.py b/swh/web/common/origin_save.py
index aa352ae3..cc95c18b 100644
--- a/swh/web/common/origin_save.py
+++ b/swh/web/common/origin_save.py
@@ -1,833 +1,887 @@
# Copyright (C) 2018-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from bisect import bisect_right
from datetime import datetime, timedelta, timezone
from itertools import product
import json
import logging
from typing import Any, Dict, List, Optional, Tuple
from prometheus_client import Gauge
import requests
import sentry_sdk
from django.core.exceptions import ObjectDoesNotExist, ValidationError
from django.core.validators import URLValidator
from django.db.models import QuerySet
from django.utils.html import escape
from swh.scheduler.utils import create_oneshot_task_dict
from swh.web import config
from swh.web.common import archive
from swh.web.common.exc import BadInputExc, ForbiddenExc, NotFoundExc
from swh.web.common.models import (
SAVE_REQUEST_ACCEPTED,
SAVE_REQUEST_PENDING,
SAVE_REQUEST_REJECTED,
SAVE_TASK_FAILED,
SAVE_TASK_NOT_CREATED,
SAVE_TASK_NOT_YET_SCHEDULED,
SAVE_TASK_RUNNING,
SAVE_TASK_SCHEDULED,
SAVE_TASK_SUCCEEDED,
SaveAuthorizedOrigin,
SaveOriginRequest,
SaveUnauthorizedOrigin,
)
from swh.web.common.origin_visits import get_origin_visits
from swh.web.common.typing import (
OriginExistenceCheckInfo,
OriginInfo,
SaveOriginRequestInfo,
)
from swh.web.common.utils import SWH_WEB_METRICS_REGISTRY, parse_iso8601_date_to_utc
scheduler = config.scheduler()
logger = logging.getLogger(__name__)
def get_origin_save_authorized_urls() -> List[str]:
"""
Get the list of origin url prefixes authorized to be
immediately loaded into the archive (whitelist).
Returns:
list: The list of authorized origin url prefix
"""
return [origin.url for origin in SaveAuthorizedOrigin.objects.all()]
def get_origin_save_unauthorized_urls() -> List[str]:
"""
Get the list of origin url prefixes forbidden to be
loaded into the archive (blacklist).
Returns:
list: the list of unauthorized origin url prefix
"""
return [origin.url for origin in SaveUnauthorizedOrigin.objects.all()]
def can_save_origin(origin_url: str, bypass_pending_review: bool = False) -> str:
"""
Check if a software origin can be saved into the archive.
Based on the origin url, the save request will be either:
* immediately accepted if the url is whitelisted
* rejected if the url is blacklisted
* put in pending state for manual review otherwise
Args:
origin_url (str): the software origin url to check
Returns:
str: the origin save request status, either **accepted**,
**rejected** or **pending**
"""
# origin url may be blacklisted
for url_prefix in get_origin_save_unauthorized_urls():
if origin_url.startswith(url_prefix):
return SAVE_REQUEST_REJECTED
# if the origin url is in the white list, it can be immediately saved
for url_prefix in get_origin_save_authorized_urls():
if origin_url.startswith(url_prefix):
return SAVE_REQUEST_ACCEPTED
# otherwise, the origin url needs to be manually verified if the user
# that submitted it does not have special permission
if bypass_pending_review:
# mark the origin URL as trusted in that case
SaveAuthorizedOrigin.objects.get_or_create(url=origin_url)
return SAVE_REQUEST_ACCEPTED
else:
return SAVE_REQUEST_PENDING
# map visit type to scheduler task
# TODO: do not hardcode the task name here (T1157)
_visit_type_task = {"git": "load-git", "hg": "load-hg", "svn": "load-svn"}
_visit_type_task_privileged = {
"bundle": "load-archive-files",
}
# map scheduler task status to origin save status
_save_task_status = {
"next_run_not_scheduled": SAVE_TASK_NOT_YET_SCHEDULED,
"next_run_scheduled": SAVE_TASK_SCHEDULED,
"completed": SAVE_TASK_SUCCEEDED,
"disabled": SAVE_TASK_FAILED,
}
# map scheduler task_run status to origin save status
_save_task_run_status = {
"scheduled": SAVE_TASK_SCHEDULED,
"started": SAVE_TASK_RUNNING,
"eventful": SAVE_TASK_SUCCEEDED,
"uneventful": SAVE_TASK_SUCCEEDED,
"failed": SAVE_TASK_FAILED,
"permfailed": SAVE_TASK_FAILED,
"lost": SAVE_TASK_FAILED,
}
+def get_savable_visit_types_dict(privileged_user: bool = False) -> Dict:
+ """Returned the supported task types the user has access to.
+
+ Args:
+ privileged_user: Flag to determine if all visit types should be returned or not.
+ Default to False to only list unprivileged visit types.
+
+ Returns:
+ the dict of supported visit types for the user
+
+ """
+ if privileged_user:
+ task_types = {**_visit_type_task, **_visit_type_task_privileged}
+ else:
+ task_types = _visit_type_task
+
+ return task_types
+
+
def get_savable_visit_types(privileged_user: bool = False) -> List[str]:
- """Get the list of visit types that can be performed through a save request.
+ """Return the list of visit types the user can perform save requests on.
Args:
privileged_user: Flag to determine if all visit types should be returned or not.
Default to False to only list unprivileged visit types.
Returns:
the list of saveable visit types
"""
- task_types = list(_visit_type_task.keys())
- if privileged_user:
- task_types += _visit_type_task_privileged.keys()
- return sorted(task_types)
+ return sorted(list(get_savable_visit_types_dict(privileged_user).keys()))
def _check_visit_type_savable(visit_type: str, privileged_user: bool = False) -> None:
visit_type_tasks = get_savable_visit_types(privileged_user)
if visit_type not in visit_type_tasks:
allowed_visit_types = ", ".join(visit_type_tasks)
raise BadInputExc(
f"Visit of type {visit_type} can not be saved! "
f"Allowed types are the following: {allowed_visit_types}"
)
_validate_url = URLValidator(schemes=["http", "https", "svn", "git"])
def _check_origin_url_valid(origin_url: str) -> None:
try:
_validate_url(origin_url)
except ValidationError:
raise BadInputExc(
"The provided origin url (%s) is not valid!" % escape(origin_url)
)
def origin_exists(origin_url: str) -> OriginExistenceCheckInfo:
"""Check the origin url for existence. If it exists, extract some more useful
information on the origin.
"""
resp = requests.head(origin_url)
exists = resp.ok
content_length: Optional[int] = None
last_modified: Optional[str] = None
if exists:
size_ = resp.headers.get("Content-Length")
content_length = int(size_) if size_ else None
- last_modified = resp.headers.get("Last-Modified")
+ try:
+ date_str = resp.headers["Last-Modified"]
+ date = datetime.strptime(date_str, "%a, %d %b %Y %H:%M:%S %Z")
+ last_modified = date.isoformat()
+ except (KeyError, ValueError):
+ # if not provided or not parsable as per the expected format, keep it None
+ pass
return OriginExistenceCheckInfo(
origin_url=origin_url,
exists=exists,
last_modified=last_modified,
content_length=content_length,
)
-def _check_origin_exists(origin_url: str) -> None:
+def _check_origin_exists(origin_url: Optional[str]) -> OriginExistenceCheckInfo:
"""Ensure the origin exists, if not raise an explicit message."""
- check = origin_exists(origin_url)
- if not check["exists"]:
+ if not origin_url:
+ raise BadInputExc("The origin url provided must be set!")
+ metadata = origin_exists(origin_url)
+ if not metadata["exists"]:
raise BadInputExc(
f"The provided origin url ({escape(origin_url)}) does not exist!"
)
+ return metadata
+
def _get_visit_info_for_save_request(
save_request: SaveOriginRequest,
) -> Tuple[Optional[datetime], Optional[str]]:
"""Retrieve visit information out of a save request
Args:
save_request: Input save origin request to retrieve information for.
Returns:
Tuple of (visit date, optional visit status) for such save request origin
"""
visit_date = None
visit_status = None
time_now = datetime.now(tz=timezone.utc)
time_delta = time_now - save_request.request_date
# stop trying to find a visit date one month after save request submission
# as those requests to storage are expensive and associated loading task
# surely ended up with errors
if time_delta.days <= 30:
try:
origin_info = archive.lookup_origin(OriginInfo(url=save_request.origin_url))
origin_visits = get_origin_visits(origin_info)
visit_dates = [parse_iso8601_date_to_utc(v["date"]) for v in origin_visits]
i = bisect_right(visit_dates, save_request.request_date)
if i != len(visit_dates):
visit_date = visit_dates[i]
visit_status = origin_visits[i]["status"]
if visit_status not in ("full", "partial", "not_found"):
visit_date = None
except Exception as exc:
sentry_sdk.capture_exception(exc)
return visit_date, visit_status
def _check_visit_update_status(
save_request: SaveOriginRequest, save_task_status: str
) -> Tuple[Optional[datetime], str]:
"""Given a save request and a save task status, determine whether a save request was
successful or failed.
Args:
save_request: Input save origin request to retrieve information for.
Returns:
Tuple of (optional visit date, save task status) for such save request origin
"""
visit_date, visit_status = _get_visit_info_for_save_request(save_request)
save_request.visit_date = visit_date
save_request.visit_status = visit_status
if visit_date and visit_status in ("full", "partial"):
# visit has been performed, mark the saving task as succeeded
save_task_status = SAVE_TASK_SUCCEEDED
elif visit_status in ("created", "ongoing"):
# visit is currently running
save_task_status = SAVE_TASK_RUNNING
elif visit_status in ("not_found", "failed"):
save_task_status = SAVE_TASK_FAILED
else:
time_now = datetime.now(tz=timezone.utc)
time_delta = time_now - save_request.request_date
# consider the task as failed if it is still in scheduled state
# 30 days after its submission
if time_delta.days > 30:
save_task_status = SAVE_TASK_FAILED
return visit_date, save_task_status
def _update_save_request_info(
save_request: SaveOriginRequest,
task: Optional[Dict[str, Any]] = None,
task_run: Optional[Dict[str, Any]] = None,
) -> SaveOriginRequestInfo:
"""Update save request information out of task and task_run information.
Args:
save_request: Save request
task: Associated scheduler task information about the save request
task_run: Most recent run occurrence of the associated task
Returns:
Summary of the save request information updated.
"""
must_save = False
visit_date = save_request.visit_date
# save task still in scheduler db
if task:
save_task_status = _save_task_status[task["status"]]
if task_run:
save_task_status = _save_task_run_status[task_run["status"]]
# Consider request from which a visit date has already been found
# as succeeded to avoid retrieving it again
if save_task_status == SAVE_TASK_SCHEDULED and visit_date:
save_task_status = SAVE_TASK_SUCCEEDED
if (
save_task_status in (SAVE_TASK_FAILED, SAVE_TASK_SUCCEEDED)
and not visit_date
):
visit_date, visit_status = _get_visit_info_for_save_request(save_request)
save_request.visit_date = visit_date
save_request.visit_status = visit_status
if visit_status in ("failed", "not_found"):
save_task_status = SAVE_TASK_FAILED
must_save = True
# Check tasks still marked as scheduled / not yet scheduled
if save_task_status in (SAVE_TASK_SCHEDULED, SAVE_TASK_NOT_YET_SCHEDULED):
visit_date, save_task_status = _check_visit_update_status(
save_request, save_task_status
)
# save task may have been archived
else:
save_task_status = save_request.loading_task_status
if save_task_status in (SAVE_TASK_SCHEDULED, SAVE_TASK_NOT_YET_SCHEDULED):
visit_date, save_task_status = _check_visit_update_status(
save_request, save_task_status
)
else:
save_task_status = save_request.loading_task_status
if (
# avoid to override final loading task status when already found
# as visit status is no longer checked once a visit date has been found
save_request.loading_task_status not in (SAVE_TASK_FAILED, SAVE_TASK_SUCCEEDED)
and save_request.loading_task_status != save_task_status
):
save_request.loading_task_status = save_task_status
must_save = True
if must_save:
save_request.save()
return save_request.to_dict()
def create_save_origin_request(
visit_type: str,
origin_url: str,
privileged_user: bool = False,
user_id: Optional[int] = None,
+ **kwargs,
) -> SaveOriginRequestInfo:
"""Create a loading task to save a software origin into the archive.
- This function aims to create a software origin loading task
- trough the use of the swh-scheduler component.
+ This function aims to create a software origin loading task trough the use of the
+ swh-scheduler component.
- First, some checks are performed to see if the visit type and origin
- url are valid but also if the the save request can be accepted.
- If those checks passed, the loading task is then created.
- Otherwise, the save request is put in pending or rejected state.
+ First, some checks are performed to see if the visit type and origin url are valid
+ but also if the the save request can be accepted. For the 'bundle' visit type, this
+ also ensures the artifacts actually exists. If those checks passed, the loading task
+ is then created. Otherwise, the save request is put in pending or rejected state.
- All the submitted save requests are logged into the swh-web
- database to keep track of them.
+ All the submitted save requests are logged into the swh-web database to keep track
+ of them.
Args:
- visit_type: the type of visit to perform (e.g git, hg, svn, ...)
+ visit_type: the type of visit to perform (e.g. git, hg, svn, bundle, ...)
origin_url: the url of the origin to save
- privileged_user: Whether the user has privileged_user access to extra
- functionality (e.g. bypass save code now review, access to extra visit type)
+ privileged: Whether the user has some more privilege than other (bypass
+ review, access to privileged other visit types)
user_id: User identifier (provided when authenticated)
+ kwargs: Optional parameters (e.g. artifact_url, artifact_filename,
+ artifact_version)
Raises:
BadInputExc: the visit type or origin url is invalid or inexistent
ForbiddenExc: the provided origin url is blacklisted
Returns:
dict: A dict describing the save request with the following keys:
* **visit_type**: the type of visit to perform
* **origin_url**: the url of the origin
* **save_request_date**: the date the request was submitted
* **save_request_status**: the request status, either **accepted**,
**rejected** or **pending**
* **save_task_status**: the origin loading task status, either
**not created**, **not yet scheduled**, **scheduled**,
**succeed** or **failed**
"""
+ visit_type_tasks = get_savable_visit_types_dict(privileged_user)
_check_visit_type_savable(visit_type, privileged_user)
_check_origin_url_valid(origin_url)
+
+ artifact_url = kwargs.get("artifact_url")
+ if visit_type == "bundle":
+ metadata = _check_origin_exists(artifact_url)
+
# if all checks passed so far, we can try and save the origin
save_request_status = can_save_origin(origin_url, privileged_user)
task = None
# if the origin save request is accepted, create a scheduler
# task to load it into the archive
if save_request_status == SAVE_REQUEST_ACCEPTED:
# create a task with high priority
- kwargs = {
+ task_kwargs: Dict[str, Any] = {
"priority": "high",
"url": origin_url,
}
+ if visit_type == "bundle":
+ # extra arguments for that type are required
+ assert metadata is not None
+ task_kwargs = dict(
+ **task_kwargs,
+ artifacts=[
+ {
+ "url": artifact_url,
+ "filename": kwargs["artifact_filename"],
+ "version": kwargs["artifact_version"],
+ "time": metadata["last_modified"],
+ "length": metadata["content_length"],
+ }
+ ],
+ )
sor = None
# get list of previously sumitted save requests
current_sors = list(
SaveOriginRequest.objects.filter(
visit_type=visit_type, origin_url=origin_url
)
)
can_create_task = False
# if no save requests previously submitted, create the scheduler task
if not current_sors:
can_create_task = True
else:
# get the latest submitted save request
sor = current_sors[0]
# if it was in pending state, we need to create the scheduler task
# and update the save request info in the database
if sor.status == SAVE_REQUEST_PENDING:
can_create_task = True
# a task has already been created to load the origin
elif sor.loading_task_id != -1:
# get the scheduler task and its status
tasks = scheduler.get_tasks([sor.loading_task_id])
task = tasks[0] if tasks else None
task_runs = scheduler.get_task_runs([sor.loading_task_id])
task_run = task_runs[0] if task_runs else None
save_request_info = _update_save_request_info(sor, task, task_run)
task_status = save_request_info["save_task_status"]
# create a new scheduler task only if the previous one has been
# already executed
if (
task_status == SAVE_TASK_FAILED
or task_status == SAVE_TASK_SUCCEEDED
):
can_create_task = True
sor = None
else:
can_create_task = False
if can_create_task:
# effectively create the scheduler task
- task_dict = create_oneshot_task_dict(_visit_type_task[visit_type], **kwargs)
+ task_dict = create_oneshot_task_dict(
+ visit_type_tasks[visit_type], **task_kwargs
+ )
+
task = scheduler.create_tasks([task_dict])[0]
# pending save request has been accepted
if sor:
sor.status = SAVE_REQUEST_ACCEPTED
sor.loading_task_id = task["id"]
sor.save()
else:
sor = SaveOriginRequest.objects.create(
visit_type=visit_type,
origin_url=origin_url,
status=save_request_status,
loading_task_id=task["id"],
user_ids=f'"{user_id}"' if user_id else None,
)
+
# save request must be manually reviewed for acceptation
elif save_request_status == SAVE_REQUEST_PENDING:
# check if there is already such a save request already submitted,
# no need to add it to the database in that case
try:
sor = SaveOriginRequest.objects.get(
visit_type=visit_type, origin_url=origin_url, status=save_request_status
)
user_ids = sor.user_ids if sor.user_ids is not None else ""
if user_id is not None and f'"{user_id}"' not in user_ids:
# update user ids list
sor.user_ids = f'{sor.user_ids},"{user_id}"'
sor.save()
# if not add it to the database
except ObjectDoesNotExist:
sor = SaveOriginRequest.objects.create(
visit_type=visit_type,
origin_url=origin_url,
status=save_request_status,
user_ids=f'"{user_id}"' if user_id else None,
)
# origin can not be saved as its url is blacklisted,
# log the request to the database anyway
else:
sor = SaveOriginRequest.objects.create(
visit_type=visit_type,
origin_url=origin_url,
status=save_request_status,
user_ids=f'"{user_id}"' if user_id else None,
)
if save_request_status == SAVE_REQUEST_REJECTED:
raise ForbiddenExc(
(
'The "save code now" request has been rejected '
"because the provided origin url is blacklisted."
)
)
assert sor is not None
return _update_save_request_info(sor, task)
def update_save_origin_requests_from_queryset(
requests_queryset: QuerySet,
) -> List[SaveOriginRequestInfo]:
"""Update all save requests from a SaveOriginRequest queryset, update their status in db
and return the list of impacted save_requests.
Args:
requests_queryset: input SaveOriginRequest queryset
Returns:
list: A list of save origin request info dicts as described in
:func:`swh.web.common.origin_save.create_save_origin_request`
"""
task_ids = []
for sor in requests_queryset:
task_ids.append(sor.loading_task_id)
save_requests = []
if task_ids:
tasks = scheduler.get_tasks(task_ids)
tasks = {task["id"]: task for task in tasks}
task_runs = scheduler.get_task_runs(tasks)
task_runs = {task_run["task"]: task_run for task_run in task_runs}
for sor in requests_queryset:
sr_dict = _update_save_request_info(
sor, tasks.get(sor.loading_task_id), task_runs.get(sor.loading_task_id),
)
save_requests.append(sr_dict)
return save_requests
def refresh_save_origin_request_statuses() -> List[SaveOriginRequestInfo]:
"""Refresh non-terminal save origin requests (SOR) in the backend.
Non-terminal SOR are requests whose status is **accepted** and their task status are
either **created**, **not yet scheduled**, **scheduled** or **running**.
This shall compute this list of SOR, checks their status in the scheduler and
optionally elasticsearch for their current status. Then update those in db.
Finally, this returns the refreshed information on those SOR.
"""
non_terminal_statuses = (
SAVE_TASK_NOT_CREATED,
SAVE_TASK_NOT_YET_SCHEDULED,
SAVE_TASK_RUNNING,
SAVE_TASK_SCHEDULED,
)
save_requests = SaveOriginRequest.objects.filter(
status=SAVE_REQUEST_ACCEPTED, loading_task_status__in=non_terminal_statuses
)
# update save request statuses
return (
update_save_origin_requests_from_queryset(save_requests)
if save_requests.count() > 0
else []
)
def get_save_origin_requests(
visit_type: str, origin_url: str
) -> List[SaveOriginRequestInfo]:
"""
Get all save requests for a given software origin.
Args:
visit_type: the type of visit
origin_url: the url of the origin
Raises:
BadInputExc: the visit type or origin url is invalid
swh.web.common.exc.NotFoundExc: no save requests can be found for the
given origin
Returns:
list: A list of save origin requests dict as described in
:func:`swh.web.common.origin_save.create_save_origin_request`
"""
_check_visit_type_savable(visit_type)
_check_origin_url_valid(origin_url)
sors = SaveOriginRequest.objects.filter(
visit_type=visit_type, origin_url=origin_url
)
if sors.count() == 0:
raise NotFoundExc(
f"No save requests found for visit of type {visit_type} "
f"on origin with url {origin_url}."
)
return update_save_origin_requests_from_queryset(sors)
def get_save_origin_task_info(
save_request_id: int, full_info: bool = True
) -> Dict[str, Any]:
"""
Get detailed information about an accepted save origin request
and its associated loading task.
If the associated loading task info is archived and removed
from the scheduler database, returns an empty dictionary.
Args:
save_request_id: identifier of a save origin request
full_info: whether to return detailed info for staff users
Returns:
A dictionary with the following keys:
- **type**: loading task type
- **arguments**: loading task arguments
- **id**: loading task database identifier
- **backend_id**: loading task celery identifier
- **scheduled**: loading task scheduling date
- **ended**: loading task termination date
- **status**: loading task execution status
- **visit_status**: Actual visit status
Depending on the availability of the task logs in the elasticsearch
cluster of Software Heritage, the returned dictionary may also
contain the following keys:
- **name**: associated celery task name
- **message**: relevant log message from task execution
- **duration**: task execution time (only if it succeeded)
- **worker**: name of the worker that executed the task
"""
try:
save_request = SaveOriginRequest.objects.get(id=save_request_id)
except ObjectDoesNotExist:
return {}
task = scheduler.get_tasks([save_request.loading_task_id])
task = task[0] if task else None
if task is None:
return {}
task_run = scheduler.get_task_runs([task["id"]])
task_run = task_run[0] if task_run else None
if task_run is None:
return {}
task_run["type"] = task["type"]
task_run["arguments"] = task["arguments"]
task_run["id"] = task_run["task"]
del task_run["task"]
del task_run["metadata"]
# Enrich the task run with the loading visit status
task_run["visit_status"] = save_request.visit_status
es_workers_index_url = config.get_config()["es_workers_index_url"]
if not es_workers_index_url:
return task_run
es_workers_index_url += "/_search"
if save_request.visit_date:
min_ts = save_request.visit_date
max_ts = min_ts + timedelta(days=7)
else:
min_ts = save_request.request_date
max_ts = min_ts + timedelta(days=30)
min_ts_unix = int(min_ts.timestamp()) * 1000
max_ts_unix = int(max_ts.timestamp()) * 1000
save_task_status = _save_task_status[task["status"]]
priority = "3" if save_task_status == SAVE_TASK_FAILED else "6"
query = {
"bool": {
"must": [
{"match_phrase": {"priority": {"query": priority}}},
{"match_phrase": {"swh_task_id": {"query": task_run["backend_id"]}}},
{
"range": {
"@timestamp": {
"gte": min_ts_unix,
"lte": max_ts_unix,
"format": "epoch_millis",
}
}
},
]
}
}
try:
response = requests.post(
es_workers_index_url,
json={"query": query, "sort": ["@timestamp"]},
timeout=30,
)
results = json.loads(response.text)
if results["hits"]["total"]["value"] >= 1:
task_run_info = results["hits"]["hits"][-1]["_source"]
if "swh_logging_args_runtime" in task_run_info:
duration = task_run_info["swh_logging_args_runtime"]
task_run["duration"] = duration
if "message" in task_run_info:
task_run["message"] = task_run_info["message"]
if "swh_logging_args_name" in task_run_info:
task_run["name"] = task_run_info["swh_logging_args_name"]
elif "swh_task_name" in task_run_info:
task_run["name"] = task_run_info["swh_task_name"]
if "hostname" in task_run_info:
task_run["worker"] = task_run_info["hostname"]
elif "host" in task_run_info:
task_run["worker"] = task_run_info["host"]
except Exception as exc:
logger.warning("Request to Elasticsearch failed\n%s", exc)
sentry_sdk.capture_exception(exc)
if not full_info:
for field in ("id", "backend_id", "worker"):
# remove some staff only fields
task_run.pop(field, None)
if "message" in task_run and "Loading failure" in task_run["message"]:
# hide traceback for non staff users, only display exception
message_lines = task_run["message"].split("\n")
message = ""
for line in message_lines:
if line.startswith("Traceback"):
break
message += f"{line}\n"
message += message_lines[-1]
task_run["message"] = message
return task_run
SUBMITTED_SAVE_REQUESTS_METRIC = "swh_web_submitted_save_requests"
_submitted_save_requests_gauge = Gauge(
name=SUBMITTED_SAVE_REQUESTS_METRIC,
documentation="Number of submitted origin save requests",
labelnames=["status", "visit_type"],
registry=SWH_WEB_METRICS_REGISTRY,
)
ACCEPTED_SAVE_REQUESTS_METRIC = "swh_web_accepted_save_requests"
_accepted_save_requests_gauge = Gauge(
name=ACCEPTED_SAVE_REQUESTS_METRIC,
documentation="Number of accepted origin save requests",
labelnames=["load_task_status", "visit_type"],
registry=SWH_WEB_METRICS_REGISTRY,
)
# Metric on the delay of save code now request per status and visit_type. This is the
# time difference between the save code now is requested and the time it got ingested.
ACCEPTED_SAVE_REQUESTS_DELAY_METRIC = "swh_web_save_requests_delay_seconds"
_accepted_save_requests_delay_gauge = Gauge(
name=ACCEPTED_SAVE_REQUESTS_DELAY_METRIC,
documentation="Save Requests Duration",
labelnames=["load_task_status", "visit_type"],
registry=SWH_WEB_METRICS_REGISTRY,
)
def compute_save_requests_metrics() -> None:
"""Compute Prometheus metrics related to origin save requests:
- Number of submitted origin save requests
- Number of accepted origin save requests
- Save Code Now requests delay between request time and actual time of ingestion
"""
request_statuses = (
SAVE_REQUEST_ACCEPTED,
SAVE_REQUEST_REJECTED,
SAVE_REQUEST_PENDING,
)
load_task_statuses = (
SAVE_TASK_NOT_CREATED,
SAVE_TASK_NOT_YET_SCHEDULED,
SAVE_TASK_SCHEDULED,
SAVE_TASK_SUCCEEDED,
SAVE_TASK_FAILED,
SAVE_TASK_RUNNING,
)
# for metrics, we want access to all visit types
visit_types = get_savable_visit_types(privileged_user=True)
labels_set = product(request_statuses, visit_types)
for labels in labels_set:
_submitted_save_requests_gauge.labels(*labels).set(0)
labels_set = product(load_task_statuses, visit_types)
for labels in labels_set:
_accepted_save_requests_gauge.labels(*labels).set(0)
duration_load_task_statuses = (
SAVE_TASK_FAILED,
SAVE_TASK_SUCCEEDED,
)
for labels in product(duration_load_task_statuses, visit_types):
_accepted_save_requests_delay_gauge.labels(*labels).set(0)
for sor in SaveOriginRequest.objects.all():
if sor.status == SAVE_REQUEST_ACCEPTED:
_accepted_save_requests_gauge.labels(
load_task_status=sor.loading_task_status, visit_type=sor.visit_type,
).inc()
_submitted_save_requests_gauge.labels(
status=sor.status, visit_type=sor.visit_type
).inc()
if (
sor.loading_task_status in (SAVE_TASK_SUCCEEDED, SAVE_TASK_FAILED)
and sor.visit_date is not None
and sor.request_date is not None
):
delay = sor.visit_date.timestamp() - sor.request_date.timestamp()
_accepted_save_requests_delay_gauge.labels(
load_task_status=sor.loading_task_status, visit_type=sor.visit_type,
).inc(delay)
diff --git a/swh/web/common/typing.py b/swh/web/common/typing.py
index 1859510f..c715e090 100644
--- a/swh/web/common/typing.py
+++ b/swh/web/common/typing.py
@@ -1,258 +1,258 @@
# Copyright (C) 2020-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from typing import Any, Dict, List, Optional, TypeVar, Union
from typing_extensions import TypedDict
from django.http import QueryDict
from swh.core.api.classes import PagedResult as CorePagedResult
QueryParameters = Union[Dict[str, Any], QueryDict]
class OriginInfo(TypedDict):
url: str
"""URL of the origin"""
class OriginMetadataInfo(TypedDict):
url: str
"""URL of the origin"""
metadata: Dict[str, Any]
"""Origin metadata associated to the origin"""
class OriginVisitInfo(TypedDict):
date: str
"""date of the visit in iso format"""
formatted_date: str
"""formatted date of the visit"""
metadata: Dict[str, Any]
"""metadata associated to the visit"""
origin: str
"""visited origin URL"""
snapshot: str
"""snapshot identifier computed during the visit"""
status: str
"""status of the visit ("ongoing", "full" or "partial") """
type: str
"""visit type (git, hg, debian, ...)"""
url: str
"""URL to browse the snapshot"""
visit: int
"""visit identifier"""
class SnapshotBranchInfo(TypedDict):
date: Optional[str]
""""author date of branch heading revision"""
directory: Optional[str]
"""directory associated to branch heading revision"""
message: Optional[str]
"""message of branch heading revision"""
name: str
"""branch name"""
alias: bool
"""define if the branch is an alias"""
revision: str
"""branch heading revision"""
url: Optional[str]
"""optional browse URL (content, directory, ...) scoped to branch"""
class SnapshotReleaseInfo(TypedDict):
branch_name: str
"""branch name associated to release in snapshot"""
date: str
"""release date"""
directory: Optional[str]
"""optional directory associatd to the release"""
id: str
"""release identifier"""
message: str
"""release message"""
name: str
"""release name"""
alias: bool
"""define if the branch is an alias"""
target: str
"""release target"""
target_type: str
"""release target_type"""
url: Optional[str]
"""optional browse URL (content, directory, ...) scoped to release"""
class SnapshotContext(TypedDict):
branch: Optional[str]
"""optional branch name set when browsing snapshot in that scope"""
branch_alias: bool
"""indicates if the focused branch is an alias"""
branches: List[SnapshotBranchInfo]
"""list of snapshot branches (possibly truncated)"""
branches_url: str
"""snapshot branches list browse URL"""
is_empty: bool
"""indicates if the snapshot is empty"""
origin_info: Optional[OriginInfo]
"""optional origin info associated to the snapshot"""
origin_visits_url: Optional[str]
"""optional origin visits URL"""
query_params: QueryParameters
"""common query parameters when browsing snapshot content"""
release: Optional[str]
"""optional release name set when browsing snapshot in that scope"""
release_alias: bool
"""indicates if the focused release is an alias"""
release_id: Optional[str]
"""optional release identifier set when browsing snapshot in that scope"""
releases: List[SnapshotReleaseInfo]
"""list of snapshot releases (possibly truncated)"""
releases_url: str
"""snapshot releases list browse URL"""
revision_id: Optional[str]
"""optional revision identifier set when browsing snapshot in that scope"""
revision_info: Optional[Dict[str, Any]]
"""optional revision info set when browsing snapshot in that scope"""
root_directory: Optional[str]
"""optional root directory identifier set when browsing snapshot content"""
snapshot_id: str
"""snapshot identifier"""
snapshot_sizes: Dict[str, int]
"""snapshot sizes grouped by branch target type"""
snapshot_swhid: str
"""snapshot SWHID"""
url_args: Dict[str, Any]
"""common URL arguments when browsing snapshot content"""
visit_info: Optional[OriginVisitInfo]
"""optional origin visit info associated to the snapshot"""
class SWHObjectInfo(TypedDict):
object_type: str
object_id: str
class SWHIDContext(TypedDict, total=False):
origin: str
anchor: str
visit: str
path: str
lines: str
class SWHIDInfo(SWHObjectInfo):
swhid: str
swhid_url: str
context: SWHIDContext
swhid_with_context: Optional[str]
swhid_with_context_url: Optional[str]
class SWHObjectInfoMetadata(TypedDict, total=False):
origin_url: Optional[str]
visit_date: Optional[str]
visit_type: Optional[str]
class ContentMetadata(SWHObjectInfo, SWHObjectInfoMetadata):
sha1: str
sha1_git: str
sha256: str
blake2s256: str
content_url: str
mimetype: str
encoding: str
size: str
language: str
path: Optional[str]
filename: Optional[str]
directory: Optional[str]
root_directory: Optional[str]
revision: Optional[str]
release: Optional[str]
snapshot: Optional[str]
class DirectoryMetadata(SWHObjectInfo, SWHObjectInfoMetadata):
directory: str
nb_files: int
nb_dirs: int
sum_file_sizes: str
root_directory: Optional[str]
path: str
revision: Optional[str]
revision_found: Optional[bool]
release: Optional[str]
snapshot: Optional[str]
class ReleaseMetadata(SWHObjectInfo, SWHObjectInfoMetadata):
release: str
author: str
author_url: str
date: str
name: str
synthetic: bool
target: str
target_type: str
snapshot: Optional[str]
class RevisionMetadata(SWHObjectInfo, SWHObjectInfoMetadata):
revision: str
author: str
author_url: str
committer: str
committer_url: str
date: str
committer_date: str
directory: str
merge: bool
metadata: str
parents: List[str]
synthetic: bool
type: str
snapshot: Optional[str]
TResult = TypeVar("TResult")
PagedResult = CorePagedResult[TResult, str]
class SaveOriginRequestInfo(TypedDict):
id: int
"""Unique key"""
save_request_date: str
"""Date of the creation request"""
visit_type: str
"""Type of the visit"""
visit_status: Optional[str]
"""Status of the visit"""
origin_url: str
"""Origin to ingest"""
save_request_status: str
"""Status of the request"""
loading_task_id: Optional[int]
"""Identifier of the loading task in the scheduler if scheduled"""
visit_date: Optional[str]
"""End of the visit if terminated"""
save_task_status: str
"""Status of the scheduled task"""
class OriginExistenceCheckInfo(TypedDict):
origin_url: str
"""Origin to check"""
exists: bool
"""Does the url exist?"""
content_length: Optional[int]
"""content length of the artifact"""
last_modified: Optional[str]
- """Last modification time reported by the server"""
+ """Last modification time reported by the server (as iso8601 string)"""
diff --git a/swh/web/templates/misc/origin-save.html b/swh/web/templates/misc/origin-save.html
index af4a7fa9..6656bcac 100644
--- a/swh/web/templates/misc/origin-save.html
+++ b/swh/web/templates/misc/origin-save.html
@@ -1,129 +1,145 @@
{% extends "../layout.html" %}
{% comment %}
-Copyright (C) 2018-2019 The Software Heritage developers
+Copyright (C) 2018-2021 The Software Heritage developers
See the AUTHORS file at the top-level directory of this distribution
License: GNU Affero General Public License version 3, or any later version
See top-level LICENSE file for more information
{% endcomment %}
{% load render_bundle from webpack_loader %}
{% load static %}
{% block title %}{{ heading }} – Software Heritage archive{% endblock %}
{% block header %}
{% render_bundle 'save' %}
{% endblock %}
{% block navbar-content %}
Save code now
{% endblock %}
{% block content %}
You can contribute to extend the content of the Software Heritage archive by submitting an origin
save request. To do so, fill the required info in the form below:
Origin url: the url of the remote repository for the software origin.
In order to avoid saving errors from Software Heritage, you should provide the clone/checkout url
as given by the provider hosting the software origin. It can easily be found in the
web interface used to browse the software origin. For instance, if you want to save a git
origin into the archive, you should check that the command $ git clone <origin_url>
does not return an error before submitting a request.
Once submitted, your save request can either be:
accepted: a visit to the provided origin will then be scheduled by Software Heritage in order to
load its content into the archive as soon as possible
rejected: the provided origin url is blacklisted and no visit will be scheduled
put in pending state: a manual review will then be performed in order to determine if the
origin can be safely loaded or not into the archive
Once a save request has been accepted, you can follow its current status in the
submitted save requests list.
If you submitted requests while authenticated, you will be able
to only display your own requests.
Date
Type
Url
Request
Status
Info
-{% endblock %}
\ No newline at end of file
+{% endblock %}
diff --git a/swh/web/tests/api/views/test_origin_save.py b/swh/web/tests/api/views/test_origin_save.py
index 9573d33d..276a2c7d 100644
--- a/swh/web/tests/api/views/test_origin_save.py
+++ b/swh/web/tests/api/views/test_origin_save.py
@@ -1,530 +1,600 @@
# Copyright (C) 2018-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import datetime, timedelta
import pytest
from django.contrib.auth.models import User
from django.core.exceptions import ObjectDoesNotExist
from django.utils import timezone
from swh.web.auth.utils import SWH_AMBASSADOR_PERMISSION
from swh.web.common.models import (
SAVE_REQUEST_ACCEPTED,
SAVE_REQUEST_PENDING,
SAVE_REQUEST_REJECTED,
SAVE_TASK_FAILED,
SAVE_TASK_NOT_CREATED,
SAVE_TASK_NOT_YET_SCHEDULED,
SAVE_TASK_SCHEDULED,
SAVE_TASK_SUCCEEDED,
VISIT_STATUS_FAILED,
VISIT_STATUS_FULL,
SaveAuthorizedOrigin,
SaveOriginRequest,
SaveUnauthorizedOrigin,
)
from swh.web.common.typing import OriginExistenceCheckInfo
from swh.web.common.utils import reverse
from swh.web.settings.tests import save_origin_rate_post
from swh.web.tests.utils import (
check_api_get_responses,
check_api_post_response,
check_api_post_responses,
)
pytestmark = pytest.mark.django_db
@pytest.fixture(autouse=True)
def populated_db():
SaveAuthorizedOrigin.objects.create(url="https://github.com/"),
SaveAuthorizedOrigin.objects.create(url="https://gitlab.com/"),
SaveUnauthorizedOrigin.objects.create(url="https://github.com/user/illegal_repo")
SaveUnauthorizedOrigin.objects.create(url="https://gitlab.com/user_to_exclude")
def test_invalid_visit_type(api_client):
url = reverse(
"api-1-save-origin",
url_args={
"visit_type": "foo",
"origin_url": "https://github.com/torvalds/linux",
},
)
check_api_get_responses(api_client, url, status_code=400)
def test_invalid_origin_url(api_client):
url = reverse(
"api-1-save-origin", url_args={"visit_type": "git", "origin_url": "bar"}
)
check_api_get_responses(api_client, url, status_code=400)
def check_created_save_request_status(
api_client,
mocker,
origin_url,
expected_request_status,
scheduler_task_status=None,
scheduler_task_run_status=None,
expected_task_status=None,
visit_date=None,
):
mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler")
mock_origin_exists = mocker.patch("swh.web.common.origin_save.origin_exists")
mock_origin_exists.return_value = OriginExistenceCheckInfo(
origin_url=origin_url, exists=True, last_modified=None, content_length=None
)
if scheduler_task_status is None:
mock_scheduler.get_tasks.return_value = []
else:
mock_scheduler.get_tasks.return_value = [
{
"priority": "high",
"policy": "oneshot",
"type": "load-git",
"arguments": {"kwargs": {"repo_url": origin_url}, "args": []},
"status": scheduler_task_status,
"id": 1,
}
]
if scheduler_task_run_status is None:
mock_scheduler.get_task_runs.return_value = []
else:
mock_scheduler.get_task_runs.return_value = [
{
"backend_id": "f00c712c-e820-41ce-a07c-9bf8df914205",
"ended": datetime.now(tz=timezone.utc) + timedelta(minutes=5),
"id": 1,
"metadata": {},
"scheduled": datetime.now(tz=timezone.utc),
"started": None,
"status": scheduler_task_run_status,
"task": 1,
}
]
mock_scheduler.create_tasks.return_value = [
{
"priority": "high",
"policy": "oneshot",
"type": "load-git",
"arguments": {"kwargs": {"repo_url": origin_url}, "args": []},
"status": "next_run_not_scheduled",
"id": 1,
}
]
url = reverse(
"api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url}
)
mock_visit_date = mocker.patch(
("swh.web.common.origin_save._get_visit_info_for_save_request")
)
mock_visit_date.return_value = (visit_date, None)
if expected_request_status != SAVE_REQUEST_REJECTED:
response = check_api_post_responses(api_client, url, data=None, status_code=200)
assert response.data["save_request_status"] == expected_request_status
assert response.data["save_task_status"] == expected_task_status
else:
check_api_post_responses(api_client, url, data=None, status_code=403)
def check_save_request_status(
api_client,
mocker,
origin_url,
expected_request_status,
expected_task_status,
scheduler_task_status="next_run_not_scheduled",
scheduler_task_run_status=None,
visit_date=None,
visit_status=None,
):
mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler")
mock_scheduler.get_tasks.return_value = [
{
"priority": "high",
"policy": "oneshot",
"type": "load-git",
"arguments": {"kwargs": {"repo_url": origin_url}, "args": []},
"status": scheduler_task_status,
"id": 1,
}
]
if scheduler_task_run_status is None:
mock_scheduler.get_task_runs.return_value = []
else:
mock_scheduler.get_task_runs.return_value = [
{
"backend_id": "f00c712c-e820-41ce-a07c-9bf8df914205",
"ended": datetime.now(tz=timezone.utc) + timedelta(minutes=5),
"id": 1,
"metadata": {},
"scheduled": datetime.now(tz=timezone.utc),
"started": None,
"status": scheduler_task_run_status,
"task": 1,
}
]
url = reverse(
"api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url}
)
mock_visit_date = mocker.patch(
("swh.web.common.origin_save._get_visit_info_for_save_request")
)
mock_visit_date.return_value = (visit_date, visit_status)
response = check_api_get_responses(api_client, url, status_code=200)
save_request_data = response.data[0]
assert save_request_data["save_request_status"] == expected_request_status
assert save_request_data["save_task_status"] == expected_task_status
assert save_request_data["visit_status"] == visit_status
# Check that save task status is still available when
# the scheduler task has been archived
mock_scheduler.get_tasks.return_value = []
response = check_api_get_responses(api_client, url, status_code=200)
save_request_data = response.data[0]
assert save_request_data["save_task_status"] == expected_task_status
assert save_request_data["visit_status"] == visit_status
def test_save_request_rejected(api_client, mocker):
origin_url = "https://github.com/user/illegal_repo"
check_created_save_request_status(
api_client, mocker, origin_url, expected_request_status=SAVE_REQUEST_REJECTED,
)
check_save_request_status(
api_client,
mocker,
origin_url,
expected_request_status=SAVE_REQUEST_REJECTED,
expected_task_status=SAVE_TASK_NOT_CREATED,
)
def test_save_request_pending(api_client, mocker):
origin_url = "https://unkwownforge.com/user/repo"
check_created_save_request_status(
api_client,
mocker,
origin_url,
expected_request_status=SAVE_REQUEST_PENDING,
expected_task_status=SAVE_TASK_NOT_CREATED,
)
check_save_request_status(
api_client,
mocker,
origin_url,
expected_request_status=SAVE_REQUEST_PENDING,
expected_task_status=SAVE_TASK_NOT_CREATED,
)
def test_save_request_succeed(api_client, mocker):
origin_url = "https://github.com/Kitware/CMake"
check_created_save_request_status(
api_client,
mocker,
origin_url,
expected_request_status=SAVE_REQUEST_ACCEPTED,
expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED,
)
check_save_request_status(
api_client,
mocker,
origin_url,
expected_request_status=SAVE_REQUEST_ACCEPTED,
expected_task_status=SAVE_TASK_SCHEDULED,
scheduler_task_status="next_run_scheduled",
scheduler_task_run_status="scheduled",
)
check_save_request_status(
api_client,
mocker,
origin_url,
expected_request_status=SAVE_REQUEST_ACCEPTED,
expected_task_status=SAVE_TASK_SUCCEEDED,
scheduler_task_status="completed",
scheduler_task_run_status="eventful",
visit_date=None,
)
visit_date = datetime.now(tz=timezone.utc) + timedelta(hours=1)
check_save_request_status(
api_client,
mocker,
origin_url,
expected_request_status=SAVE_REQUEST_ACCEPTED,
expected_task_status=SAVE_TASK_SUCCEEDED,
scheduler_task_status="completed",
scheduler_task_run_status="eventful",
visit_date=visit_date,
visit_status=VISIT_STATUS_FULL,
)
def test_save_request_failed(api_client, mocker):
origin_url = "https://gitlab.com/inkscape/inkscape"
check_created_save_request_status(
api_client,
mocker,
origin_url,
expected_request_status=SAVE_REQUEST_ACCEPTED,
expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED,
)
check_save_request_status(
api_client,
mocker,
origin_url,
expected_request_status=SAVE_REQUEST_ACCEPTED,
expected_task_status=SAVE_TASK_SCHEDULED,
scheduler_task_status="next_run_scheduled",
scheduler_task_run_status="scheduled",
)
check_save_request_status(
api_client,
mocker,
origin_url,
expected_request_status=SAVE_REQUEST_ACCEPTED,
expected_task_status=SAVE_TASK_FAILED,
scheduler_task_status="disabled",
scheduler_task_run_status="failed",
visit_status=VISIT_STATUS_FAILED,
)
def test_create_save_request_only_when_needed(api_client, mocker):
origin_url = "https://github.com/webpack/webpack"
SaveOriginRequest.objects.create(
visit_type="git",
origin_url=origin_url,
status=SAVE_REQUEST_ACCEPTED,
loading_task_id=56,
)
check_created_save_request_status(
api_client,
mocker,
origin_url,
scheduler_task_status="next_run_not_scheduled",
expected_request_status=SAVE_REQUEST_ACCEPTED,
expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED,
)
sors = list(
SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url)
)
assert len(sors) == 1
check_created_save_request_status(
api_client,
mocker,
origin_url,
scheduler_task_status="next_run_scheduled",
scheduler_task_run_status="scheduled",
expected_request_status=SAVE_REQUEST_ACCEPTED,
expected_task_status=SAVE_TASK_SCHEDULED,
)
sors = list(
SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url)
)
assert len(sors) == 1
visit_date = datetime.now(tz=timezone.utc) + timedelta(hours=1)
check_created_save_request_status(
api_client,
mocker,
origin_url,
scheduler_task_status="completed",
expected_request_status=SAVE_REQUEST_ACCEPTED,
expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED,
visit_date=visit_date,
)
sors = list(
SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url)
)
# check_api_post_responses sends two POST requests to check YAML and JSON response
assert len(sors) == 3
check_created_save_request_status(
api_client,
mocker,
origin_url,
scheduler_task_status="disabled",
expected_request_status=SAVE_REQUEST_ACCEPTED,
expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED,
)
sors = list(
SaveOriginRequest.objects.filter(visit_type="git", origin_url=origin_url)
)
assert len(sors) == 5
def test_get_save_requests_unknown_origin(api_client):
unknown_origin_url = "https://gitlab.com/foo/bar"
url = reverse(
"api-1-save-origin",
url_args={"visit_type": "git", "origin_url": unknown_origin_url},
)
response = check_api_get_responses(api_client, url, status_code=404)
assert response.data == {
"exception": "NotFoundExc",
"reason": (
"No save requests found for visit of type git on origin with url %s."
)
% unknown_origin_url,
}
_visit_type = "git"
_origin_url = "https://github.com/python/cpython"
def test_save_requests_rate_limit(api_client, mocker):
create_save_origin_request = mocker.patch(
"swh.web.api.views.origin_save.create_save_origin_request"
)
def _save_request_dict(*args, **kwargs):
return {
"id": 1,
"visit_type": _visit_type,
"origin_url": _origin_url,
"save_request_date": datetime.now().isoformat(),
"save_request_status": SAVE_REQUEST_ACCEPTED,
"save_task_status": SAVE_TASK_NOT_YET_SCHEDULED,
"visit_date": None,
"visit_status": None,
}
create_save_origin_request.side_effect = _save_request_dict
url = reverse(
"api-1-save-origin",
url_args={"visit_type": _visit_type, "origin_url": _origin_url},
)
for _ in range(save_origin_rate_post):
check_api_post_response(api_client, url, status_code=200)
check_api_post_response(api_client, url, status_code=429)
def test_save_request_form_server_error(api_client, mocker):
create_save_origin_request = mocker.patch(
"swh.web.api.views.origin_save.create_save_origin_request"
)
create_save_origin_request.side_effect = Exception("Server error")
url = reverse(
"api-1-save-origin",
url_args={"visit_type": _visit_type, "origin_url": _origin_url},
)
check_api_post_responses(api_client, url, status_code=500)
@pytest.fixture
def origin_to_review():
return "https://git.example.org/user/project"
def test_create_save_request_pending_review_anonymous_user(
api_client, origin_to_review
):
url = reverse(
"api-1-save-origin",
url_args={"visit_type": "git", "origin_url": origin_to_review},
)
response = check_api_post_responses(api_client, url, status_code=200)
assert response.data["save_request_status"] == SAVE_REQUEST_PENDING
with pytest.raises(ObjectDoesNotExist):
SaveAuthorizedOrigin.objects.get(url=origin_to_review)
-def test_create_save_request_accepted_ambassador_user(
+def test_create_save_request_bundle_with_ambassador_user(
+ api_client, origin_to_review, keycloak_oidc, mocker, requests_mock,
+):
+
+ keycloak_oidc.realm_permissions = [SWH_AMBASSADOR_PERMISSION]
+ oidc_profile = keycloak_oidc.login()
+ api_client.credentials(HTTP_AUTHORIZATION=f"Bearer {oidc_profile['refresh_token']}")
+
+ originUrl = "https://somewhere.org/simple"
+ artifact_version = "1.2.3"
+ artifact_filename = f"tarball-{artifact_version}.tar.gz"
+ artifact_url = f"{originUrl}/{artifact_filename}"
+ content_length = "100"
+ last_modified = "Sun, 21 Aug 2011 16:26:32 GMT"
+
+ requests_mock.head(
+ artifact_url,
+ status_code=200,
+ headers={"content-length": content_length, "last-modified": last_modified,},
+ )
+
+ mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler")
+ mock_scheduler.get_task_runs.return_value = []
+ mock_scheduler.create_tasks.return_value = [
+ {
+ "id": 10,
+ "priority": "high",
+ "policy": "oneshot",
+ "status": "next_run_not_scheduled",
+ "type": "load-archive-files",
+ "arguments": {
+ "args": [],
+ "kwargs": {
+ "url": originUrl,
+ "artifacts": [
+ {
+ "url": artifact_url,
+ "filename": artifact_filename,
+ "version": artifact_version,
+ "time": last_modified,
+ "length": content_length,
+ }
+ ],
+ },
+ },
+ },
+ ]
+
+ # then
+ url = reverse(
+ "api-1-save-origin",
+ url_args={"visit_type": "bundle", "origin_url": originUrl,},
+ )
+
+ response = check_api_post_response(
+ api_client,
+ url,
+ status_code=200,
+ data={
+ "artifact_url": artifact_url,
+ "artifact_filename": artifact_filename,
+ "artifact_version": artifact_version,
+ },
+ )
+
+ assert response.data["save_request_status"] == SAVE_REQUEST_ACCEPTED
+
+ assert SaveAuthorizedOrigin.objects.get(url=originUrl)
+
+
+def test_create_save_request_bundle_accepted_ambassador_user(
api_client, origin_to_review, keycloak_oidc, mocker
):
keycloak_oidc.realm_permissions = [SWH_AMBASSADOR_PERMISSION]
oidc_profile = keycloak_oidc.login()
api_client.credentials(HTTP_AUTHORIZATION=f"Bearer {oidc_profile['refresh_token']}")
check_created_save_request_status(
api_client,
mocker,
origin_to_review,
expected_request_status=SAVE_REQUEST_ACCEPTED,
expected_task_status=SAVE_TASK_NOT_YET_SCHEDULED,
)
assert SaveAuthorizedOrigin.objects.get(url=origin_to_review)
def test_create_save_request_anonymous_user_no_user_id(api_client):
origin_url = "https://some.git.hosters/user/repo"
url = reverse(
"api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url},
)
check_api_post_responses(api_client, url, status_code=200)
sor = SaveOriginRequest.objects.get(origin_url=origin_url)
assert sor.user_ids is None
def test_create_save_request_authenticated_user_id(
api_client, origin_to_review, keycloak_oidc, mocker
):
oidc_profile = keycloak_oidc.login()
api_client.credentials(HTTP_AUTHORIZATION=f"Bearer {oidc_profile['refresh_token']}")
origin_url = "https://some.git.hosters/user/repo2"
url = reverse(
"api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url},
)
response = check_api_post_response(api_client, url, status_code=200)
assert response.wsgi_request.user.id is not None
user_id = str(response.wsgi_request.user.id)
sor = SaveOriginRequest.objects.get(user_ids=f'"{user_id}"')
assert sor.user_ids == f'"{user_id}"'
def test_create_pending_save_request_multiple_authenticated_users(api_client):
origin_url = "https://some.git.hosters/user/repo3"
first_user = User.objects.create_user(username="first_user", password="")
second_user = User.objects.create_user(username="second_user", password="")
url = reverse(
"api-1-save-origin", url_args={"visit_type": "git", "origin_url": origin_url},
)
api_client.force_login(first_user)
check_api_post_response(api_client, url, status_code=200)
api_client.force_login(second_user)
check_api_post_response(api_client, url, status_code=200)
assert SaveOriginRequest.objects.get(user_ids__contains=f'"{first_user.id}"')
assert SaveOriginRequest.objects.get(user_ids__contains=f'"{second_user.id}"')
diff --git a/swh/web/tests/common/test_origin_save.py b/swh/web/tests/common/test_origin_save.py
index 064a398c..52c2a412 100644
--- a/swh/web/tests/common/test_origin_save.py
+++ b/swh/web/tests/common/test_origin_save.py
@@ -1,524 +1,549 @@
# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import datetime, timedelta, timezone
from functools import partial
import re
from typing import Optional
import pytest
import requests
from swh.core.pytest_plugin import get_response_cb
from swh.web.common.exc import BadInputExc
from swh.web.common.models import (
SAVE_REQUEST_ACCEPTED,
SAVE_TASK_FAILED,
SAVE_TASK_RUNNING,
SAVE_TASK_SCHEDULED,
SAVE_TASK_SUCCEEDED,
VISIT_STATUS_FULL,
SaveOriginRequest,
)
from swh.web.common.origin_save import (
_check_origin_exists,
_check_visit_type_savable,
_visit_type_task,
_visit_type_task_privileged,
get_savable_visit_types,
get_save_origin_requests,
get_save_origin_task_info,
origin_exists,
refresh_save_origin_request_statuses,
)
from swh.web.common.typing import (
OriginExistenceCheckInfo,
OriginVisitInfo,
SaveOriginRequestInfo,
)
from swh.web.config import get_config
_es_url = "http://esnode1.internal.softwareheritage.org:9200"
_es_workers_index_url = "%s/swh_workers-*" % _es_url
_origin_url = "https://gitlab.com/inkscape/inkscape"
_visit_type = "git"
_task_id = 203525448
@pytest.fixture(autouse=True)
def requests_mock_datadir(datadir, requests_mock_datadir):
"""Override default behavior to deal with post method"""
cb = partial(get_response_cb, datadir=datadir)
requests_mock_datadir.post(re.compile("https?://"), body=cb)
return requests_mock_datadir
@pytest.mark.django_db
def test_get_save_origin_archived_task_info(mocker):
_get_save_origin_task_info_test(mocker, task_archived=True)
@pytest.mark.django_db
def test_get_save_origin_task_full_info_with_es(mocker):
_get_save_origin_task_info_test(mocker, es_available=True)
@pytest.mark.django_db
def test_get_save_origin_task_info_with_es(mocker):
_get_save_origin_task_info_test(mocker, es_available=True, full_info=False)
@pytest.mark.django_db
def test_get_save_origin_task_info_without_es(mocker):
_get_save_origin_task_info_test(mocker, es_available=False)
def _mock_scheduler(
mocker, task_status="completed", task_run_status="eventful", task_archived=False
):
mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler")
task = {
"arguments": {"args": [], "kwargs": {"repo_url": _origin_url},},
"current_interval": timedelta(days=64),
"id": _task_id,
"next_run": datetime.now(tz=timezone.utc) + timedelta(days=64),
"policy": "oneshot",
"priority": "high",
"retries_left": 0,
"status": task_status,
"type": "load-git",
}
mock_scheduler.get_tasks.return_value = [dict(task) if not task_archived else None]
task_run = {
"backend_id": "f00c712c-e820-41ce-a07c-9bf8df914205",
"ended": datetime.now(tz=timezone.utc) + timedelta(minutes=5),
"id": 654270631,
"metadata": {},
"scheduled": datetime.now(tz=timezone.utc),
"started": None,
"status": task_run_status,
"task": _task_id,
}
mock_scheduler.get_task_runs.return_value = [
dict(task_run) if not task_archived else None
]
return task, task_run
@pytest.mark.parametrize(
"wrong_type,privileged_user",
[
("dummy", True),
("dumb", False),
("bundle", False), # when no privilege, this is rejected
],
)
def test__check_visit_type_savable(wrong_type, privileged_user):
with pytest.raises(BadInputExc, match="Allowed types"):
_check_visit_type_savable(wrong_type, privileged_user)
# when privileged_user, the following is accepted though
_check_visit_type_savable("bundle", True)
def test_get_savable_visit_types():
default_list = list(_visit_type_task.keys())
assert set(get_savable_visit_types()) == set(default_list)
privileged_list = default_list.copy()
privileged_list += list(_visit_type_task_privileged.keys())
assert set(get_savable_visit_types(privileged_user=True)) == set(privileged_list)
def _get_save_origin_task_info_test(
mocker, task_archived=False, es_available=True, full_info=True
):
swh_web_config = get_config()
if es_available:
swh_web_config.update({"es_workers_index_url": _es_workers_index_url})
else:
swh_web_config.update({"es_workers_index_url": ""})
sor = SaveOriginRequest.objects.create(
request_date=datetime.now(tz=timezone.utc),
visit_type=_visit_type,
origin_url="https://gitlab.com/inkscape/inkscape",
status=SAVE_REQUEST_ACCEPTED,
visit_date=datetime.now(tz=timezone.utc) + timedelta(hours=1),
loading_task_id=_task_id,
)
task, task_run = _mock_scheduler(mocker, task_archived=task_archived)
es_response = requests.post("%s/_search" % _es_workers_index_url).json()
task_exec_data = es_response["hits"]["hits"][-1]["_source"]
sor_task_info = get_save_origin_task_info(sor.id, full_info=full_info)
expected_result = (
{
"type": task["type"],
"arguments": task["arguments"],
"id": task["id"],
"backend_id": task_run["backend_id"],
"scheduled": task_run["scheduled"],
"started": task_run["started"],
"ended": task_run["ended"],
"status": task_run["status"],
"visit_status": sor.visit_status,
}
if not task_archived
else {}
)
if es_available and not task_archived:
expected_result.update(
{
"message": task_exec_data["message"],
"name": task_exec_data["swh_task_name"],
"worker": task_exec_data["hostname"],
}
)
if not full_info:
expected_result.pop("id", None)
expected_result.pop("backend_id", None)
expected_result.pop("worker", None)
if "message" in expected_result:
message = ""
message_lines = expected_result["message"].split("\n")
for line in message_lines:
if line.startswith("Traceback"):
break
message += f"{line}\n"
message += message_lines[-1]
expected_result["message"] = message
assert sor_task_info == expected_result
@pytest.mark.django_db
def test_get_save_origin_requests_find_visit_date(mocker):
# create a save request
SaveOriginRequest.objects.create(
request_date=datetime.now(tz=timezone.utc),
visit_type=_visit_type,
origin_url=_origin_url,
status=SAVE_REQUEST_ACCEPTED,
visit_date=None,
loading_task_id=_task_id,
)
# mock scheduler and archive
_mock_scheduler(mocker)
mock_archive = mocker.patch("swh.web.common.origin_save.archive")
mock_archive.lookup_origin.return_value = {"url": _origin_url}
mock_get_origin_visits = mocker.patch(
"swh.web.common.origin_save.get_origin_visits"
)
# create a visit for the save request
visit_date = datetime.now(tz=timezone.utc).isoformat()
visit_info = OriginVisitInfo(
date=visit_date,
formatted_date="",
metadata={},
origin=_origin_url,
snapshot="",
status="full",
type=_visit_type,
url="",
visit=34,
)
mock_get_origin_visits.return_value = [visit_info]
# check visit date has been correctly found
sors = get_save_origin_requests(_visit_type, _origin_url)
assert len(sors) == 1
assert sors[0]["save_task_status"] == SAVE_TASK_SUCCEEDED
assert sors[0]["visit_date"] == visit_date
mock_get_origin_visits.assert_called_once()
# check visit is not searched again when it has been found
get_save_origin_requests(_visit_type, _origin_url)
mock_get_origin_visits.assert_called_once()
# check visit date are not searched for save requests older than
# one month
sor = SaveOriginRequest.objects.create(
visit_type=_visit_type,
origin_url=_origin_url,
status=SAVE_REQUEST_ACCEPTED,
loading_task_id=_task_id,
visit_date=None,
)
sor.request_date = datetime.now(tz=timezone.utc) - timedelta(days=31)
sor.save()
_mock_scheduler(mocker, task_status="disabled", task_run_status="failed")
sors = get_save_origin_requests(_visit_type, _origin_url)
assert len(sors) == 2
assert sors[0]["save_task_status"] == SAVE_TASK_FAILED
assert sors[0]["visit_date"] is None
mock_get_origin_visits.assert_called_once()
def _get_save_origin_requests(
mocker, load_status, visit_status, request_date: Optional[datetime] = None
):
"""Wrapper around the get_origin_save_origin_request call.
"""
SaveOriginRequest.objects.create(
request_date=datetime.now(tz=timezone.utc),
visit_type=_visit_type,
visit_status=visit_status,
origin_url=_origin_url,
status=SAVE_REQUEST_ACCEPTED,
visit_date=None,
loading_task_id=_task_id,
)
# mock scheduler and archives
_mock_scheduler(
mocker, task_status="next_run_scheduled", task_run_status=load_status
)
mock_archive = mocker.patch("swh.web.common.origin_save.archive")
mock_archive.lookup_origin.return_value = {"url": _origin_url}
mock_get_origin_visits = mocker.patch(
"swh.web.common.origin_save.get_origin_visits"
)
# create a visit for the save request with status created
visit_date = datetime.now(tz=timezone.utc).isoformat()
visit_info = OriginVisitInfo(
date=visit_date,
formatted_date="",
metadata={},
origin=_origin_url,
snapshot="", # make mypy happy
status=visit_status,
type=_visit_type,
url="",
visit=34,
)
mock_get_origin_visits.return_value = [visit_info]
sors = get_save_origin_requests(_visit_type, _origin_url)
mock_get_origin_visits.assert_called_once()
return sors
@pytest.mark.parametrize("visit_date", [None, "some-date"])
def test_from_save_origin_request_to_save_request_info_dict(visit_date):
"""Ensure save request to json serializable dict is fine
"""
request_date = datetime.now(tz=timezone.utc)
_visit_date = request_date + timedelta(minutes=5) if visit_date else None
request_date = datetime.now(tz=timezone.utc)
sor = SaveOriginRequest(
request_date=request_date,
visit_type=_visit_type,
visit_status=VISIT_STATUS_FULL,
origin_url=_origin_url,
status=SAVE_REQUEST_ACCEPTED,
loading_task_status=None,
visit_date=_visit_date,
loading_task_id=1,
)
assert sor.to_dict() == SaveOriginRequestInfo(
id=sor.id,
origin_url=sor.origin_url,
visit_type=sor.visit_type,
save_request_date=sor.request_date.isoformat(),
save_request_status=sor.status,
save_task_status=sor.loading_task_status,
visit_status=sor.visit_status,
visit_date=_visit_date.isoformat() if _visit_date else None,
loading_task_id=sor.loading_task_id,
)
def test__check_origin_exists_404(requests_mock):
url_ko = "https://example.org/some-inexistant-url"
requests_mock.head(url_ko, status_code=404)
with pytest.raises(BadInputExc, match="not exist"):
_check_origin_exists(url_ko)
+@pytest.mark.parametrize("invalid_origin", [None, ""])
+def test__check_origin_invalid_input(invalid_origin):
+ with pytest.raises(BadInputExc, match="must be set"):
+ _check_origin_exists(invalid_origin)
+
+
def test__check_origin_exists_200(requests_mock):
url = "https://example.org/url"
requests_mock.head(url, status_code=200)
# passes the check
- _check_origin_exists(url)
+ actual_metadata = _check_origin_exists(url)
+
+ # and we actually may have retrieved some metadata on the origin
+ assert actual_metadata == origin_exists(url)
def test_origin_exists_404(requests_mock):
"""Origin which does not exist should be reported as inexistent"""
url_ko = "https://example.org/some-inexistant-url"
requests_mock.head(url_ko, status_code=404)
actual_result = origin_exists(url_ko)
assert actual_result == OriginExistenceCheckInfo(
origin_url=url_ko, exists=False, last_modified=None, content_length=None,
)
def test_origin_exists_200_no_data(requests_mock):
"""Existing origin should be reported as such (no extra information)"""
url = "http://example.org/real-url"
requests_mock.head(
url, status_code=200,
)
actual_result = origin_exists(url)
assert actual_result == OriginExistenceCheckInfo(
origin_url=url, exists=True, last_modified=None, content_length=None,
)
def test_origin_exists_200_with_data(requests_mock):
"""Existing origin should be reported as such (+ extra information)"""
url = "http://example.org/real-url"
requests_mock.head(
url,
status_code=200,
headers={
"content-length": "10",
"last-modified": "Sun, 21 Aug 2011 16:26:32 GMT",
},
)
actual_result = origin_exists(url)
assert actual_result == OriginExistenceCheckInfo(
origin_url=url,
exists=True,
content_length=10,
- last_modified="Sun, 21 Aug 2011 16:26:32 GMT",
+ last_modified="2011-08-21T16:26:32",
+ )
+
+
+def test_origin_exists_200_with_data_unexpected_date_format(requests_mock):
+ """Existing origin should be ok, unexpected last modif time result in no time"""
+ url = "http://example.org/real-url2"
+ # this is parsable but not as expected
+ unexpected_format_date = "Sun, 21 Aug 2021 16:26:32"
+ requests_mock.head(
+ url, status_code=200, headers={"last-modified": unexpected_format_date,},
+ )
+
+ actual_result = origin_exists(url)
+ # so the resulting date is None
+ assert actual_result == OriginExistenceCheckInfo(
+ origin_url=url, exists=True, content_length=None, last_modified=None,
)
@pytest.mark.django_db
@pytest.mark.parametrize("visit_status", ["created", "ongoing",])
def test_get_save_origin_requests_no_visit_date_found(mocker, visit_status):
"""Uneventful visits with failed visit status are marked as failed
"""
sors = _get_save_origin_requests(
mocker, load_status="scheduled", visit_status=visit_status,
)
# check no visit date has been found
assert len(sors) == 1
assert sors[0]["save_task_status"] == SAVE_TASK_RUNNING
assert sors[0]["visit_date"] is None
assert sors[0]["visit_status"] == visit_status
@pytest.mark.django_db
@pytest.mark.parametrize("visit_status", ["not_found", "failed",])
def test_get_save_origin_requests_no_failed_status_override(mocker, visit_status):
"""Uneventful visits with failed statuses (failed, not found) are marked as failed
"""
sors = _get_save_origin_requests(
mocker, load_status="uneventful", visit_status=visit_status
)
assert len(sors) == 1
assert sors[0]["save_task_status"] == SAVE_TASK_FAILED
visit_date = sors[0]["visit_date"]
if visit_status == "failed":
assert visit_date is None
else:
assert visit_date is not None
sors = get_save_origin_requests(_visit_type, _origin_url)
assert len(sors) == 1
assert sors[0]["save_task_status"] == SAVE_TASK_FAILED
assert sors[0]["visit_status"] == visit_status
@pytest.mark.django_db
@pytest.mark.parametrize(
"load_status,visit_status",
[("eventful", "full"), ("eventful", "partial"), ("uneventful", "partial"),],
)
def test_get_visit_info_for_save_request_succeeded(mocker, load_status, visit_status):
"""Nominal scenario, below 30 days, returns something"""
sors = _get_save_origin_requests(
mocker, load_status=load_status, visit_status=visit_status
)
assert len(sors) == 1
assert sors[0]["save_task_status"] == SAVE_TASK_SUCCEEDED
assert sors[0]["visit_date"] is not None
assert sors[0]["visit_status"] == visit_status
sors = get_save_origin_requests(_visit_type, _origin_url)
assert sors[0]["save_task_status"] == SAVE_TASK_SUCCEEDED
assert sors[0]["visit_status"] == visit_status
@pytest.mark.django_db
@pytest.mark.parametrize("load_status", ["eventful", "uneventful",])
def test_get_visit_info_incomplete_visit_still_successful(mocker, load_status):
"""Incomplete visit information, yet the task is considered ok
"""
sors = _get_save_origin_requests(
mocker, load_status=load_status, visit_status=None,
)
assert len(sors) == 1
assert sors[0]["save_task_status"] == SAVE_TASK_SUCCEEDED
assert sors[0]["visit_date"] is None
assert sors[0]["visit_status"] is None
# nothing to refresh so nothing to return
assert len(refresh_save_origin_request_statuses()) == 0
@pytest.mark.django_db
def test_refresh_save_request_statuses(mocker, api_client):
"""Refresh filters non-terminal save origins requests and update if changes
"""
sors = _get_save_origin_requests(
mocker, load_status=SAVE_TASK_SCHEDULED, visit_status=None,
)
assert len(sors) == 1
# no changes so refresh does detect the entry but does nothing
sors = refresh_save_origin_request_statuses()
assert len(sors) == 1
for sor in sors:
# as it turns out, in this test, this won't update anything as no new status got
# returned by the scheduler
assert sor["save_task_status"] == SAVE_TASK_SCHEDULED
# make the scheduler return eventful for that task
_mock_scheduler(mocker)
# Detected entry, this time it should be updated
sors = refresh_save_origin_request_statuses()
assert len(sors) == 1
for sor in sors:
# as it turns out, in this test, this won't update anything as no new status got
# returned by the scheduler
assert sor["save_task_status"] == SAVE_TASK_SUCCEEDED